import scrapy
from bs4 import BeautifulSoup

from ..items import MyspiderItem


class DoubanSpider(scrapy.Spider):
    name="douban"
    allowed_domains=['book.douban.com/top250']
    start_urls=['https://book.douban.com/top250']

    def parse(self, response, **kwargs):
        # beautifulsoup解析response的dom树
        bs=BeautifulSoup(response.text,"html.parser")
        # 根据tr标签找到每本书的信息
        tr_tag=bs.find_all("tr",class_="item")

        # 遍历每本书，读取书名，出版信息，热评
        for i in tr_tag:
            name=i.find_all("a")[1]["title"]
            info=i.find("p",class_="pl").text
            quote=i.find("span",class_="inq").text
            item=MyspiderItem()
            item["name"]=name
            item['info']=info
            item['quote']=quote

            # 返回给引擎
            yield item


